{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6b9660d4",
   "metadata": {},
   "source": [
    "## Hyplus File Analysis\n",
    "\n",
    "All tool functions are made by Akira37 from Hyperplasma. Keep in mind that all relative paths start from this file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03c03acc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "acf7b715",
   "metadata": {},
   "source": [
    "### Analyze Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2eedbbfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "def analyze_dataset(data_path):\n",
    "    \"\"\"\n",
    "    分析数据集（csv文件）\n",
    "    \"\"\"\n",
    "    if not os.path.exists(data_path):\n",
    "        print(f\"[Error] Dataset file not found: {data_path}\")\n",
    "        return\n",
    "    print(f\"\\nDataset path: {data_path}\")\n",
    "\n",
    "    # 只读取前1000行，防止大文件卡死\n",
    "    try:\n",
    "        df = pd.read_csv(data_path, encoding='utf-8')\n",
    "    except UnicodeDecodeError:\n",
    "        df = pd.read_csv(data_path, encoding='gbk')\n",
    "    except Exception as e:\n",
    "        print(f\"[Error] Failed to read CSV: {e}\")\n",
    "        return\n",
    "\n",
    "    print(f\"\\nShape: {df.shape}\")\n",
    "    print(f\"\\nColumns: {list(df.columns)}\")\n",
    "    print(\"\\nDtypes:\")\n",
    "    print(df.dtypes)\n",
    "    print(\"\\nMissing values per column:\")\n",
    "    print(df.isnull().sum())\n",
    "    print(\"\\nDescriptive statistics:\")\n",
    "    print(df.describe(include='all').T)\n",
    "\n",
    "    print(\"\\nUnique value count per column:\")\n",
    "    for col in df.columns:\n",
    "        nunique = df[col].nunique(dropna=False)\n",
    "        print(f\"  {col}: {nunique}\")\n",
    "\n",
    "    print(\"\\nFirst 5 rows:\")\n",
    "    print(df.head())\n",
    "    print(\"\\nLast 5 rows:\")\n",
    "    print(df.tail())\n",
    "\n",
    "\n",
    "data_path=\"../dataset/weather.csv\"\n",
    "analyze_dataset(data_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "161c3472",
   "metadata": {},
   "source": [
    "### Analyze Npy File\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f84bdc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def analyze_npy_file(npy_path):\n",
    "    \"\"\"\n",
    "    分析npy文件内容，输出shape、dtype、min、max、mean、std、前后各5个样本内容（如为多维则只展示部分切片）。\n",
    "    \"\"\"\n",
    "    if not os.path.exists(npy_path):\n",
    "        print(f\"[Error] npy文件不存在: {npy_path}\")\n",
    "        return\n",
    "    arr = np.load(npy_path)\n",
    "    print(f\"\\nFile: {npy_path}\")\n",
    "    print(f\"Shape: {arr.shape}\")\n",
    "    print(f\"Dtype: {arr.dtype}\")\n",
    "    print(f\"Min: {arr.min():.4f}, Max: {arr.max():.4f}\")\n",
    "    print(f\"Mean: {arr.mean():.4f}, Std: {arr.std():.4f}\")\n",
    "    # 展示前后5个样本\n",
    "    n = arr.shape[0] if arr.ndim > 0 else 1\n",
    "    print(\"\\nFirst 5 samples:\")\n",
    "    if arr.ndim == 1:\n",
    "        print(arr[:5])\n",
    "    elif arr.ndim == 2:\n",
    "        print(arr[:5, :])\n",
    "    elif arr.ndim == 3:\n",
    "        print(arr[:5, :, :])\n",
    "    else:\n",
    "        print(arr[:5])\n",
    "    print(\"\\nLast 5 samples:\")\n",
    "    if arr.ndim == 1:\n",
    "        print(arr[-5:])\n",
    "    elif arr.ndim == 2:\n",
    "        print(arr[-5:, :])\n",
    "    elif arr.ndim == 3:\n",
    "        print(arr[-5:, :, :])\n",
    "    else:\n",
    "        print(arr[-5:])\n",
    "\n",
    "\n",
    "# npy_path=\"../outputs/PatchTST/weather/preds_weather_inv.npy\"\n",
    "# npy_path=\"../outputs/PatchTST/weather/trues_weather_inv.npy\"\n",
    "# npy_path = \"../outputs/PatchTST/weather/preds_weather_inv.npy\"\n",
    "npy_path = \"../outputs/PatchTST/weather/trues_weather_inv.npy\"\n",
    "analyze_npy_file(npy_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "84fc3d09",
   "metadata": {},
   "source": [
    "### Print File Tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aff0ae95",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_file_tree(root_path, max_files_per_level=30, prefix=\"\"):\n",
    "    \"\"\"\n",
    "    打印文件夹树结构，每层最多显示max_files_per_level个文件，多余用\"...\"隐藏。\n",
    "    不显示隐藏文件（以.开头）。\n",
    "    :param root_path: 根目录路径\n",
    "    :param max_files_per_level: 每层最多显示的文件/文件夹数\n",
    "    :param prefix: 前缀（递归用）\n",
    "    \"\"\"\n",
    "    try:\n",
    "        items = sorted([f for f in os.listdir(root_path) if not f.startswith('.')])\n",
    "    except Exception as e:\n",
    "        print(prefix + \"[无法访问]\")\n",
    "        return\n",
    "\n",
    "    count = 0\n",
    "    for i, name in enumerate(items):\n",
    "        if count >= max_files_per_level:\n",
    "            print(prefix + \"...\")\n",
    "            break\n",
    "        path = os.path.join(root_path, name)\n",
    "        connector = \"├── \" if i < len(items) - 1 else \"└── \"\n",
    "        print(prefix + connector + name)\n",
    "        if os.path.isdir(path):\n",
    "            new_prefix = prefix + (\"│   \" if i < len(items) - 1 else \"    \")\n",
    "            print_file_tree(path, max_files_per_level, new_prefix)\n",
    "        count += 1\n",
    "\n",
    "\n",
    "root_path = \"..\"\n",
    "print_file_tree(root_path, max_files_per_level=15)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pytorch",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
